// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//-----------------------------------------------------------------------------
// WARNING: read all the warnings below before modifying this file!
//-----------------------------------------------------------------------------
//
// This schema defines how to configure TFLite for delegation. These
// definitions can be used in multiple ways: as output of a compatibility list,
// in benchmarking tools and to decouple delegate instantiation from code.
//
// The schema is work-in-progress, covering the most broadly used delegates and
// options.
//
// This schema is written using ProtoBuf syntax, but it is also used to generate
// a corresponding FlatBuffer schema.
//
// WARNING: The TfLiteSettings flatbuffer is used as part of the ABI
// for TensorFlow in Play Services, so please be careful to preserve
// binary backwards compatibility!
//
// WARNING: the Protobuf to Flatbuffer schema conversion does NOT
// pay any attention to the protobuf field numbers in this file,
// so setting the protobuf field numbers is NOT sufficient to preserve binary
// backwards compatibility.  Instead, to preserve backwards binary
// compatibility, new fields MUST ONLY be added at the END of messages,
// and fields should NEVER be deleted, but instead can only be deprecated.
//
// WARNING: before modifying this file, you should copy the previous contents
// of this file to 'testdata/configuration.proto_prev'.  This is used to test
// that your changes will preserve binary backwards compatibility.
//
// WARNING: you need to manually generate and update the generated flatbuffer
// code (configuration_generated.h) when modifying this file. See BUILD for
// more information. Below are manual steps for reference:
// bazel build
// //tensorflow/lite/acceleration/configuration:proto_to_flatbuffer
// && cp
// bazel-bin/tensorflow/lite/acceleration/configuration/configuration_generated.h
// tensorflow/lite/acceleration/configuration/configuration_generated.h
// NOTE: If you are a Google developer using the internal dev environment,
// please read the description of the following bash script and then run it:
// ./third_party/tensorflow/lite/acceleration/configuration/google/regenerate_tflite_configuration_generated_header.sh

// LINT.IfChange

syntax = "proto2";

package tflite.proto;

// ExecutionPreference is used to match accelerators against the preferences of
// the current application or usecase. Some of the values here can appear both
// in the compatibility list and as input, some only as input.
//
// These are separate from NNAPIExecutionPreference - the compatibility list
// design doesn't assume a one-to-one mapping between which usecases
// compatibility list entries have been developed for and what settings are used
// for NNAPI.
enum ExecutionPreference {
  // Match any selected preference. Allowlist (semantically - value is same as
  // on input).
  ANY = 0;
  // Match low latency preference. Both compatibility list and input.
  LOW_LATENCY = 1;
  // Math low power preference. Both compatibility list and input.
  LOW_POWER = 2;
  // Never accelerate. Can be used for input to compatibility list or for
  // standalone Acceleration configuration.
  FORCE_CPU = 3;
}

// TFLite accelerator to use.
//
// STATUS: support library and the stable delegate loader settings are agnostic
// to the actual accelerator.
enum Delegate {
  NONE = 0;

  NNAPI = 1;
  GPU = 2;
  HEXAGON = 3;
  XNNPACK = 4;
  // The EdgeTpu in Pixel devices.
  EDGETPU = 5;
  // The Coral EdgeTpu Dev Board / USB accelerator.
  EDGETPU_CORAL = 6;
  // Apple CoreML.
  CORE_ML = 7;
  // Arm NN Delegate.
  ARMNN = 8;
}

enum NNAPIExecutionPreference {
  // Undefined.
  UNDEFINED = 0;
  // Prefer executing in a way that minimizes battery drain.
  NNAPI_LOW_POWER = 1;
  // Prefer returning a single answer as fast as possible, even if this causes
  // more power consumption.
  NNAPI_FAST_SINGLE_ANSWER = 2;
  // Prefer maximizing the throughput of successive frames, for example when
  // processing successive frames coming from the camera.
  NNAPI_SUSTAINED_SPEED = 3;
}

enum NNAPIExecutionPriority {
  NNAPI_PRIORITY_UNDEFINED = 0;
  NNAPI_PRIORITY_LOW = 1;
  NNAPI_PRIORITY_MEDIUM = 2;
  NNAPI_PRIORITY_HIGH = 3;
}

// One possible acceleration configuration.
message ComputeSettings {
  // Which preference to use this accelerator for.
  optional ExecutionPreference preference = 1;
  // How to configure TFLite
  optional TFLiteSettings tflite_settings = 2;
  // Identifiers to use for instrumentation and telemetry.
  optional string model_namespace_for_statistics = 3;
  optional string model_identifier_for_statistics = 4;

  // 'Maybe' acceleration: use mini-benchmark to select settings.
  optional MinibenchmarkSettings settings_to_test_locally = 5;
}

// NNAPI delegate settings.
message NNAPISettings {
  // Which instance (NNAPI accelerator) to use. One driver may provide several
  // accelerators (though a driver may also hide several back-ends behind one
  // name, at the choice of the driver vendor).
  // Note that driver introspection is only available in Android Q and later.
  optional string accelerator_name = 1;

  // Deprecated; use the compilation_caching_settings in TFLiteSettings.
  //
  // NNAPI model compilation caching settings to be passed to
  // tflite::StatefulNnApiDelegate
  optional string cache_directory = 2 [deprecated = true];
  optional string model_token = 3 [deprecated = true];

  // NNAPI execution preference to pass. See
  // https://developer.android.com/ndk/reference/group/neural-networks.html
  optional NNAPIExecutionPreference execution_preference = 4;

  // Number of instances to cache for the same model (for input size
  // changes). This is mandatory for getting reasonable performance in that
  // case.
  optional int32 no_of_nnapi_instances_to_cache = 5;

  // Deprecated; use the fallback_settings in TFLiteSettings.
  //
  // Whether to automatically fall back to TFLite CPU path.
  optional FallbackSettings fallback_settings = 6 [deprecated = true];

  // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
  // 10+ when an accelerator name is not specified. The NNAPI CPU typically
  // performs less well than the TfLite built-in kernels; but allowing allows a
  // model to be partially accelerated which may be a win.
  optional bool allow_nnapi_cpu_on_android_10_plus = 7;

  optional NNAPIExecutionPriority execution_priority = 8;

  // Whether to allow dynamic dimension sizes without re-compilation.
  // A tensor of with dynamic dimension must have a valid dims_signature
  // defined.
  // Only supported in NNAPI 1.1 and newer versions.
  // WARNING: Setting this flag to true may result in model being rejected by
  // accelerator. This should only be enabled if the target device supports
  // dynamic dimensions of the model.
  // By default this is set to false.
  optional bool allow_dynamic_dimensions = 9;

  // Whether to allow the NNAPI accelerator to optionally use lower-precision
  // float16 (16-bit floating point) arithmetic when doing calculations on
  // float32 (32-bit floating point).
  optional bool allow_fp16_precision_for_fp32 = 10;

  // Whether to use NNAPI Burst mode.
  // Burst mode allows accelerators to efficiently manage resources, which
  // would significantly reduce overhead especially if the same delegate
  // instance is to be used for multiple inferences.
  optional bool use_burst_computation = 11;

  // Optional pointer to NNAPI Support Library provided pointer to
  // NnApiSLDriverImplFL5 which can be used to construct the
  // NNAPI delegate.
  optional int64 support_library_handle = 12;
}

// LINT.IfChange
// Which GPU backend to select. Default behaviour on Android is to try OpenCL
// and if it's not available fall back to OpenGL.
enum GPUBackend {
  UNSET = 0;
  OPENCL = 1;
  OPENGL = 2;
  // Not yet supported.
  // VULKAN = 3;
  // METAL = 4;
}

// GPU inference priorities define relative priorities given by the GPU delegate
// to different client needs.
// Corresponds to TfLiteGpuInferencePriority.
enum GPUInferencePriority {
  GPU_PRIORITY_AUTO = 0;
  GPU_PRIORITY_MAX_PRECISION = 1;
  GPU_PRIORITY_MIN_LATENCY = 2;
  GPU_PRIORITY_MIN_MEMORY_USAGE = 3;
}

// GPU inference preference for initialization time vs. inference time.
// Corresponds to TfLiteGpuInferenceUsage.
enum GPUInferenceUsage {
  // Delegate will be used only once, therefore, bootstrap/init time should
  // be taken into account.
  GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0;

  // Prefer maximizing the throughput. Same delegate will be used repeatedly on
  // multiple inputs.
  GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1;
}

// GPU Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
message GPUSettings {
  // Obsolete: Ignored if inference_priority1/2/3 are set.
  optional bool is_precision_loss_allowed = 1;
  optional bool enable_quantized_inference = 2 [default = true];
  optional GPUBackend force_backend = 3;

  // Ordered priorities provide better control over desired semantics,
  // where priority(n) is more important than priority(n+1). Therefore,
  // each time inference engine needs to make a decision, it uses
  // ordered priorities to do so.
  //
  // Default values correspond to GPU_PRIORITY_AUTO.
  // AUTO priority can only be used when higher priorities are fully specified.
  // For example:
  //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
  //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
  //            priority3 = AUTO
  //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
  //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
  //            priority3 = MAX_PRECISION
  // Invalid priorities will result in error.
  //
  // For more information, see TfLiteGpuDelegateOptionsV2.
  optional GPUInferencePriority inference_priority1 = 4
      [default = GPU_PRIORITY_AUTO];
  optional GPUInferencePriority inference_priority2 = 5
      [default = GPU_PRIORITY_AUTO];
  optional GPUInferencePriority inference_priority3 = 6
      [default = GPU_PRIORITY_AUTO];

  // Whether to optimize for compilation+execution time or execution time only.
  optional GPUInferenceUsage inference_preference = 7;

  // Model serialization. Setting both of these fields will also set the
  // TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION flag on the delegate.
  //
  // GPU model serialization directory passed in TfLiteGpuDelegateOptionsV2.
  // This should be set to the application's code cache directory so that it can
  // not be accessed by other apps and is correctly deleted on app updates.
  // tflite::StatefulNnApiDelegate
  optional string cache_directory = 8;
  // Normally, the model name with version number should be provided here, since
  // each model needs an unique ID to avoid cache collision.
  optional string model_token = 9;
}
// LINT.ThenChange(GpuAccelerationConfig.java)

// Hexagon Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
message HexagonSettings {
  optional int32 debug_level = 1;
  optional int32 powersave_level = 2;
  optional bool print_graph_profile = 3;
  optional bool print_graph_debug = 4;
}

// XNNPack Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
enum XNNPackFlags {
  // These flags match the flags in xnnpack_delegate.h.
  TFLITE_XNNPACK_DELEGATE_NO_FLAGS = 0;
  // Enable fast signed integer XNNpack kernels.
  TFLITE_XNNPACK_DELEGATE_FLAG_QS8 = 1;
  // Enable fast unsigned integer XNNpack kernels.
  TFLITE_XNNPACK_DELEGATE_FLAG_QU8 = 2;
  // Enable both, signed and unsigned integer XNNpack kernels.
  TFLITE_XNNPACK_DELEGATE_FLAG_QS8_QU8 = 3;
  // Force 16-bit floating point inference.
  TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 = 4;
}

message XNNPackSettings {
  optional int32 num_threads = 1;
  optional XNNPackFlags flags = 2 [default = TFLITE_XNNPACK_DELEGATE_NO_FLAGS];
}

// CoreML Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/coreml/coreml_delegate.h
message CoreMLSettings {
  // Note the enum order change from the above header for better proto practice.
  enum EnabledDevices {
    // Always create Core ML delegate.
    DEVICES_ALL = 0;
    // Create Core ML delegate only on devices with Apple Neural Engine.
    DEVICES_WITH_NEURAL_ENGINE = 1;
  }
  // Only create delegate when Neural Engine is available on the device.
  optional EnabledDevices enabled_devices = 1;

  // Specifies target Core ML version for model conversion.
  // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not
  // delegated due to input rank constraint.
  // if not set to one of the valid versions, the delegate will use highest
  // version possible in the platform.
  // Valid versions: (2, 3)
  optional int32 coreml_version = 2;
  // This sets the maximum number of Core ML delegates created.
  // Each graph corresponds to one delegated node subset in the
  // TFLite model. Set this to 0 to delegate all possible partitions.
  optional int32 max_delegated_partitions = 3 [default = 0];
  // This sets the minimum number of nodes per partition delegated with
  // Core ML delegate. Defaults to 2.
  optional int32 min_nodes_per_partition = 4 [default = 2];
}

// Stable delegate loader settings.
//
// See
// tensorflow/lite/core/acceleration/configuration/c/stable_delegate.h
// An example stable delegate:
// tensorflow/lite/delegates/utils/experimental/sample_stable_delegate
message StableDelegateLoaderSettings {
  // The path of the stable delegate shared object file. Then the stable
  // delegate provider can dynamically load the shared object file.
  optional string delegate_path = 1;
  // Uniquely identifies a delegate. Format (snake_case): {vendor}_{delegate}.
  // e.g. "google_edgetpu_delegate"
  optional string delegate_name = 2;
}

// Fields related to compilation caching.  In this context compilation caching
// refers to the concept of caching compilation artifacts that a delegate might
// produce when translating a model graph into a delegate-internal structure
// (for example, this could include compiled CPU code, or instructions for a
// separate accelerator chip such as a GPU, TPU, or DSP). Caching compilation
// artifacts can speed-up subsequent compilations, and hence the time it takes
// to apply a delegate.
//
// Compilation caching is an optional feature.  Setting these fields for a
// delegate that does not implement it will have no effect.
message CompilationCachingSettings {
  // The cache dir for the TFLite model.
  // If not set then the delegate should not try to cache the compilation.
  //
  // The delegate is responsible for synchronizing access to files in the
  // 'cache_dir'.  E.g., it is legal to create multiple threads or processes,
  // each of which has its own delegate instance, and provide the same
  // 'cache_dir' to those delegate instances.
  optional string cache_dir = 1;
  // The unique token string for a TFLite model.  A caller that wants the
  // delegate to cache the compilation should set this field.  If set then it is
  // the caller's responsibility to ensure there is no clash of the tokens.
  // E.g., if an app uses several models (with this delegate) on a given device,
  // then no two models should have the same model_token.  If no model token is
  // provided, but the 'cache_dir' is set, then the delegate might still cache
  // the compilation, e.g. by deriving a unique token internally, but this
  // behavior can be delegate-specific. NOTE: when using compilation caching, it
  // is not recommended to use the same delegate instance for multiple models.
  optional string model_token = 2;
}

// EdgeTPU device spec.
//
message EdgeTpuDeviceSpec {
  // EdgeTPU platform types.
  enum PlatformType {
    MMIO = 0;
    REFERENCE = 1;
    SIMULATOR = 2;
    REMOTE_SIMULATOR = 3;
  }

  // Execution platform for the EdgeTPU device.
  optional PlatformType platform_type = 1;

  // Number of chips to use for the EdgeTPU device.
  optional int32 num_chips = 2;

  // Paths to the EdgeTPU devices;
  repeated string device_paths = 3;

  // Chip family used by the EdgeTpu device.
  optional int32 chip_family = 4;
}

// Generic definitions of EdgeTPU power states.
enum EdgeTpuPowerState {
  // Undefined power state.
  UNDEFINED_POWERSTATE = 0;

  // TPU core is off but control cluster is on.
  TPU_CORE_OFF = 1;

  // A non-active low-power state that has much smaller transition time to
  // active compared to off.
  READY = 2;

  // Minimum power active state.
  ACTIVE_MIN_POWER = 3;

  // Very low performance, very low power.
  ACTIVE_VERY_LOW_POWER = 4;

  // Low performance, low power.
  ACTIVE_LOW_POWER = 5;

  // The normal performance and power. This setting usually provides the
  // optimal perf/power trade-off for the average use-case.
  ACTIVE = 6;

  // Maximum performance level. Potentially higher power and thermal. This
  // setting may not be allowed in production depending on the system.
  OVER_DRIVE = 7;
}

message EdgeTpuInactivePowerConfig {
  // Inactive power states between inferences.
  optional EdgeTpuPowerState inactive_power_state = 1;

  // Inactive timeout in microseconds between inferences.
  optional int64 inactive_timeout_us = 2;
}

// EdgeTPU Delegate settings.
//
// For security reasons, only certain apps that are part of the platform's
// trusted code base are permitted to use the features defined in this message.
// General apps should use `GoogleEdgeTpuSettings` instead.
message EdgeTpuSettings {
  // Float truncation types for EdgeTPU.
  enum FloatTruncationType {
    UNSPECIFIED = 0;
    NO_TRUNCATION = 1;
    BFLOAT16 = 2;
    HALF = 3;
  }

  enum QosClass {
    QOS_UNDEFINED = 0;
    BEST_EFFORT = 1;
    REALTIME = 2;
  }

  // Target inference power state for running the model.
  optional EdgeTpuPowerState inference_power_state = 1;

  // Inactive power states between inferences.
  repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;

  // Priority for the inference request.
  optional int32 inference_priority = 3 [default = -1];

  // Device spec for creating the EdgeTpu device.
  optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;

  // A unique identifier of the input TfLite model.
  optional string model_token = 5;

  // Float truncation type for EdgeTPU.
  optional FloatTruncationType float_truncation_type = 6;

  // QoS class to determine chunking size for PRO onward.
  optional QosClass qos_class = 7 [default = QOS_UNDEFINED];

  // Cluster IDs the model will be compiled for.
  repeated int32 hardware_cluster_ids = 8 [packed = true];

  // Public model ID to be logged in logs, traces and metrics for identifying
  // the model to help debugging.
  // The configured string must obey the following rules:
  // 1. Must not contain any confidential information, because public_model_id
  // will be logged in android logs and traces which are publicly visible.
  // 2. Must not contain any private user data or PII (Personally Identifiable
  // Information), such as age, language, geography, religion, etc.
  // 3. Should be <=30 chars, otherwise EdgeTpu software might truncate the
  // string due to logging size constraints.
  // 4. Please try to use a unique name so that it's easier to identify the
  // model during debugging.
  optional string public_model_id = 9;
}

// Google EdgeTPU delegate settings.
message GoogleEdgeTpuSettings {
  enum Priority {
    PRIORITY_UNDEFINED = 0;
    PRIORITY_LOW = 1;
    PRIORITY_MEDIUM = 2;
    PRIORITY_HIGH = 3;
  }

  enum TriState {
    TRISTATE_UNDEFINED = 0;
    TRISTATE_FALSE = 1;
    TRISTATE_TRUE = 2;
  }

  // Controls the verbosity level of the delegate log messages. Set to -1 to let
  // the delegate choose. Otherwise, it must range from 0 to 10 (inclusive),
  // where lower values indicate less verbosity. A higher verbosity level may
  // have an adverse impact on the delegate performance.
  optional int32 log_verbosity = 1 [default = -1];

  // Whether or not the client requests detailed delegate traces.
  // The resulting traces can be used for performance analysis with tools such
  // as perfetto (https://perfetto.dev/docs/quickstart/android-tracing).
  // Enabling tracing may have an adverse impact on the delegate performance.
  optional bool enable_tracing = 2 [default = false];

  // Specifies the execution priority. The priority is global. Requests from
  // different clients are prioritized relative to one another.
  optional Priority priority = 3;

  // Reserved.
  optional bytes extension_data = 4;

  // A unique identifier of the input model. Creating delegates with different
  // user model binaries with the same model identifier will overwrite
  // previously cached entries, saving disk space.
  // If this field is not set, the model will be treated as a new entry, and
  // will cost disk space to cache.
  // This field is different from the model_token in CompilationCachingSettings,
  // where the users may reuse the same model_identifier for different flavors
  // of the same model to save disk space, whereas model_token must be unique.
  // Example usage:
  // (1) An app only uses one model, and wants to update the model.
  // For both the existing and new models, set: model_identifier = "my_model"
  // Creating the delegate with the new entry this way will delete the old
  // cache entry, and replace it with the new version of "my_model"
  // (2) An app A/B tests two versions of the same model (e.g. a stable version
  //  and a testing/staging/beta version), and wants to frequently switch
  // between them.
  // The clients should use different model_identifier for the two variants.
  // Model_A: model_identifier = "my_model_a"
  // Model_B: model_identifier = "my_model_b"
  // Both Model A and B will be cached separately, and coexist for efficient
  // lookups.
  optional string model_identifier = 5 [default = ""];

  // If set to true, the user must use TFLite Async API to run the inference.
  optional bool use_async_api = 6 [default = false];

  // Specifies whether or not the delegate should handle cache management for
  // the imported input or output buffers with TFLite Async API. These
  // options have no effect if the user is not using the TFLite Async API.
  optional bool delegate_should_manage_cache_for_inputs = 7 [default = true];
  optional bool delegate_should_manage_cache_for_outputs = 8 [default = true];

  // Specifies whether or not cache coherency is preferred for the imported
  // input or output buffers with TFLite Async API. These options are purely
  // advisory. Even if the user specifies that cache coherency is preferred,
  // the delegate may still choose to use cache incoherent memory under certain
  // circumstances, e.g. hardware limitation. If it is set to
  // TRISTATE_UNDEFINED, the delegate will use the default value based on the
  // device type. These options have no effect if the user is not using the
  // TFLite Async API.
  optional TriState prefer_cache_coherency_for_inputs = 9;
  optional TriState prefer_cache_coherency_for_outputs = 10;

  // Whether to allow the accelerator to optionally use lower-precision
  // float16 (16-bit floating point) arithmetic when doing calculations on
  // float32 (32-bit floating point).
  optional bool allow_fp16_precision_for_fp32 = 11 [default = false];
}

// Coral Dev Board / USB accelerator delegate settings.
//
// See
// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
message CoralSettings {
  enum Performance {
    UNDEFINED = 0;
    MAXIMUM = 1;
    HIGH = 2;
    MEDIUM = 3;
    LOW = 4;
  }

  // The Edge Tpu device to be used. See
  // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
  optional string device = 1;
  // The desired performance level. This setting adjusts the internal clock
  // rate to achieve different performance / power balance. Higher performance
  // values improve speed, but increase power usage.
  optional Performance performance = 2 [default = MAXIMUM];
  // If true, always perform device firmware update (DFU) after reset. DFU is
  // usually only necessary after power cycle.
  optional bool usb_always_dfu = 3;
  // The maximum bulk in queue length. Larger queue length may improve USB
  // performance on the direction from device to host. When not specified (or
  // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
  // current EdgeTpu Coral implementation.
  optional int32 usb_max_bulk_in_queue_length = 4;
}

message CPUSettings {
  // Set to -1 to let the interpreter choose. Otherwise, must be > 0.
  optional int32 num_threads = 1 [default = -1];
}

// Arm NN Delegate Settings.
// More information about Arm NN delegate options can be found in
// https://arm-software.github.io/armnn/latest/delegate.xhtml#delegateoptions
message ArmNNSettings {
  // A comma separated list without whitespaces of backends
  // which should be used for execution. Falls back to next backend in list
  // if previous does not provide support for operation.
  optional string backends = 1;
  // Allows the use of optimisation techniques e.g. Winograd that
  // will reduce execution time with the possibility of a drop in accuracy.
  optional bool fastmath = 2;
  // Additional Arm NN delegate options. See
  // https://arm-software.github.io/armnn/latest/delegate.xhtml#delegateoptions
  optional string additional_parameters = 3;
}

// How to configure TFLite.
message TFLiteSettings {
  // Which delegate to use.
  optional Delegate delegate = 1;

  // How to configure the chosen delegate.
  // (In principle we would like to use 'oneof', but flatc turns that into an
  // nested anonymous table rather than a union. See
  // https://github.com/google/flatbuffers/issues/4628).
  optional NNAPISettings nnapi_settings = 2;
  optional GPUSettings gpu_settings = 3;
  optional HexagonSettings hexagon_settings = 4;
  optional XNNPackSettings xnnpack_settings = 5;
  optional CoreMLSettings coreml_settings = 11;

  // How to configure CPU execution.
  optional CPUSettings cpu_settings = 6;

  // Shared delegation settings.
  optional int32 max_delegated_partitions = 7;

  // For configuring the EdgeTpuDelegate.
  // See also `google_edgetpu_settings` below.
  optional EdgeTpuSettings edgetpu_settings = 8;

  // For configuring the Coral EdgeTpu Delegate.
  optional CoralSettings coral_settings = 10;

  // Whether to automatically fall back to TFLite CPU path.
  optional FallbackSettings fallback_settings = 9;

  // Whether to disable default delegates (XNNPack).
  // TODO(b/260405596): Update the comment to clarify the interaction between
  // `disable_default_delegates` and `fallback_settings`.
  optional bool disable_default_delegates = 12;

  // For loading a stable delegate. If an app supplies a delegate shared library
  // (e.g. packaged with the app, or downloaded separately), the app can use
  // this field for passing the path to the delegate shared library.
  //
  // The stable delegate loader settings field works together with the settings
  // of other concrete stable delegates; the stable delegate loader is not a
  // concrete delegate type but a mechanism for initializing the TF Lite stable
  // delegates.
  //
  // See
  // tensorflow/lite/delegates/utils/experimental/sample_stable_delegate
  optional StableDelegateLoaderSettings stable_delegate_loader_settings = 13;

  // For configuring the Google EdgeTpu Delegate.
  optional GoogleEdgeTpuSettings google_edgetpu_settings = 14;

  // Compilation caching settings.
  optional CompilationCachingSettings compilation_caching_settings = 15;

  // For configuring the Arm NN delegate.
  optional ArmNNSettings armnn_settings = 16;
}

// Whether to automatically fallback to TFLite CPU path on delegation errors.
//
// Typically fallback is enabled in production use but disabled in tests and
// benchmarks to ensure they test the intended path.
message FallbackSettings {
  // Whether to allow automatically falling back to TfLite CPU path on
  // compilation failure. Default is not allowing automatic fallback.
  //
  // This is useful in naive production usecases where the caller would prefer
  // for the model to run even if it's not accelerated. More advanced users will
  // implement fallback themselves; e.g., by using a different model on CPU.
  //
  // Note that compilation errors may occur either at initial
  // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
  // resizing.
  optional bool allow_automatic_fallback_on_compilation_error = 7;
  // Whether to allow automatically falling back to TfLite CPU path on
  // execution error. Default is not allowing automatic fallback.
  //
  // Experimental, use with care (only when you have complete control over the
  // client code).
  //
  // The caveat above for compilation error holds.  Additionally, execution-time
  // errors are harder to handle automatically as they require invalidating the
  // TfLite interpreter which most client code has not been designed to deal
  // with.
  optional bool allow_automatic_fallback_on_execution_error = 8;
}

// On-device mini-benchmark result storage. The following definitions are used
// to keep an append-only log of benchmark results on-device. (Hence there is
// single top-level event that is used for all data).
//
// These definitions don't need a proto-to-flatbuffer conversion, since they are
// not used for specifying configuration in the Tasks library.

// Which stage of benchmarking the event is for.
// There might be multiple events with the same type, if a benchmark is run
// multiple times.
enum BenchmarkEventType {
  UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
  // Benchmark start. A start without an end can be interpreted as a test that
  // has crashed or hung.
  START = 1;
  // Benchmarking completion. A model was successfully loaded, acceleration
  // configured and inference run without errors. There may still be an issue
  // with correctness of results, or with performance.
  END = 2;
  // Benchmark was not completed due to an error. The error may be a handled
  // error (e.g., failure in a delegate), or a crash.
  ERROR = 3;
  // Benchmark data has been sent for logging.
  LOGGED = 4;
  // Benchmark encountered an error but was able to continue. The error is not
  // related to the model execution but to the mini-benchmark logic. An example
  // of error is a failure when trying to set the CPU affinity of the benchmark
  // runner process.
  RECOVERED_ERROR = 5;
}

// A correctness metric from a benchmark, for example KL-divergence between
// known-good CPU output and on-device output. These are primarily used for
// telemetry and monitored server-side.
message BenchmarkMetric {
  optional string name = 1;
  repeated float values = 2 [packed = true];
}

// Outcome of a successfully complete benchmark run. This information is
// intended to both be used on-device to select best compute configuration as
// well as sent to server for monitoring.
//
// Used with event type END.
// Next ID: 7
message BenchmarkResult {
  // Time to load model and apply acceleration. Initialization may get run
  // multiple times to get information on variance.
  repeated int64 initialization_time_us = 1 [packed = true];
  // Time to run inference (call Invoke()). Inference may get run multiple times
  // to get information on variance.
  repeated int64 inference_time_us = 2 [packed = true];
  // Maximum memory used. Measures size of application heap (does not
  // necessarily take into account driver-side allocation.
  optional int32 max_memory_kb = 3;
  // Whether the inference produced correct results (validation graph output
  // 'ok' for all test inputs). Used on-device to disallow configurations that
  // produce incorrect results (e.g., due to OpenCL driver bugs).
  optional bool ok = 4;
  // Metrics that were used to determine the 'ok' status.
  repeated BenchmarkMetric metrics = 5;

  message InferenceOutput {
    // The matching Flatbuffer type is ubyte.
    optional bytes value = 1;
  }
  // Model output in byte format. Each InferenceOutput comes from one output
  // tensor. It is ordered the same as tflite::Interpreter::output_tensor(),
  // i.e. the value of output_tensor(i) is stored in actual_output[i]. Only
  // populated in custom validation case.
  repeated InferenceOutput actual_output = 6;
}

// A handled error.
message ErrorCode {
  // Which delegate the error comes from (or NONE, if it comes from the tflite
  // framework).
  optional Delegate source = 1;
  // What the tflite level error is.
  optional int32 tflite_error = 2;
  // What the underlying error is (e.g., NNAPI or OpenGL error).
  optional int64 underlying_api_error = 3;
}

// When during benchmark execution an error occurred.
enum BenchmarkStage {
  UNKNOWN = 0;
  // During model loading or delegation.
  INITIALIZATION = 1;
  // During inference.
  INFERENCE = 2;
}

// An error that occurred during benchmarking.
//
// Used with event type ERROR.
message BenchmarkError {
  // How far benchmarking got.
  optional BenchmarkStage stage = 1;
  // Process exit code.
  optional int32 exit_code = 2;
  // Signal the process received.
  optional int32 signal = 3;
  // Handled tflite error.
  repeated ErrorCode error_code = 4;
  // Mini-benchmark error code.
  optional int32 mini_benchmark_error_code = 5;
}

// Top-level benchmarking event stored on-device. All events for a model are
// parsed to detect the status.
message BenchmarkEvent {
  // Which settings were used for benchmarking.
  optional TFLiteSettings tflite_settings = 1;
  // Type of the event.
  optional BenchmarkEventType event_type = 2;
  // Result of benchmark, used when type is END.
  optional BenchmarkResult result = 3;
  // Error during benchmark, used when type is ERROR.
  optional BenchmarkError error = 4;
  // Start timestamps. These are used for
  // 1. Checking whether a test was started but not completed within a given
  // deadline.
  // 2. Optionally, telemetry timestamps.
  optional int64 boottime_us = 5;
  optional int64 wallclock_us = 6;
}

// Represent the decision on the best acceleration from the mini-benchmark.
message BestAccelerationDecision {
  // Number of events used to take the decision.
  // Using just the size instaed of the full list of events to save space.
  optional int32 number_of_source_events = 1;

  // Event with min latency in the source ones.
  optional BenchmarkEvent min_latency_event = 2;

  // Min latency as read from min_latency_event.
  optional int64 min_inference_time_us = 3;
}

// Represent a failure during the initialization of the mini-benchmark.
message BenchmarkInitializationFailure {
  // Status code returned by the mini-benchmark initialization function.
  optional int32 initialization_status = 1;
}

// Events generated by the mini-benchmark before and after triggering
// the different configuration-specific benchmarks
message MiniBenchmarkEvent {
  // Not using oneof because of the way the generated cpp code.
  // See comment above on TfLite settings for details.

  // If set to true, this event is used to mark all previous events in the
  // mini-benchmark internal storage as read and one of the other fields
  // in this message will have a value.
  optional bool is_log_flushing_event = 1;
  // Event generated when a best acceleration decision is taken.
  optional BestAccelerationDecision best_acceleration_decision = 2;
  // Reports a failure during mini-benchmark initialization.
  optional BenchmarkInitializationFailure initialization_failure = 3;
  // Event generated while benchmarking the different settings to test locally.
  optional BenchmarkEvent benchmark_event = 4;
}

// How to access the model for mini-benchmark.
// Mini-benchmark can read the model from a file path, a file
// descriptor, or in-memory model. The file descriptor typically comes from the
// Android asset manager. Since mini-benchmark runs in a separate process, it
// can not access the in-memory model directly. Instead, it will copy the
// in-memory model to the validation process.
//
// Users should set one of  the following:
// 1) filename, or
// 2) all of fd, offset (optional, default to 0) and length, or
// 3) both buffer_handle and length.
message ModelFile {
  // Filename for reading model from.
  optional string filename = 1;
  // File descriptor to read model from.
  optional int64 fd = 2;
  // Offset for model in file descriptor.
  optional int64 offset = 3;
  // Length of model.
  optional int64 length = 4;
  optional ModelIdGroup model_id_group = 5;
  // In-memory buffer handle to the model. This handle will be cast to a pointer
  // of type const uint8_t* to load the model. The caller needs to ensure the
  // buffer handle out-lives the mini-benchmark main process.
  // NOTE: When using buffer_handle, this proto should not serialized and copied
  // across process boundaries (e.g. via a file), since it may contain handles
  // that refer to addresses in the current process's address space.
  optional int64 buffer_handle = 6;
}

message ModelIdGroup {
  optional string model_namespace = 1;
  optional string model_id = 2;
}

// Where to store mini-benchmark state.
message BenchmarkStoragePaths {
  // Base path to the files used to store benchmark results in. Two files
  // will be generated: one with the given path and an extra file to store
  // events related to best acceleration results at path storage_file_path +
  // ".extra.fb". Must be specific to the model.
  // Note on Android, this should be the code cache directory.
  optional string storage_file_path = 1;

  // Path to a directory for intermediate files (lock files, extracted
  // binaries).
  // Note on Android, this typically is the data cache directory (i.e. the one
  // returned by `getCacheDir()`).
  optional string data_directory_path = 2;
}

// Validation related settings.
// Next ID: 2
message ValidationSettings {
  // Timeout for one settings under test. If test didn't finish within this
  // timeout, this setting is considered hanging.
  optional int64 per_test_timeout_ms = 1;
}

// How to run a minibenchmark.
// Next ID: 5
message MinibenchmarkSettings {
  // Which settings to test. This would typically be filled in from an
  // allowlist.
  repeated TFLiteSettings settings_to_test = 1;
  // How to access the model. This would typically be set dynamically, as it
  // depends on the application folder and/or runtime state.
  // NOTE: When using buffer_handle, this proto should not serialized and copied
  // across process boundaries (e.g. via a file), since it may contain handles
  // that refer to addresses in the current process's address space.
  optional ModelFile model_file = 2;
  // Where to store state. This would typically be set dynamically, as it
  // depends on the application folder.
  optional BenchmarkStoragePaths storage_paths = 3;
  // Validation test related settings.
  optional ValidationSettings validation_settings = 4;
}

// Schema used for cache Benchmark result.
message BenchmarkEventStorage {
  optional ModelIdGroup model_id_group = 1;
  optional BenchmarkEvent benchmark_event = 2;
}

// LINT.ThenChange(//tensorflow/lite/acceleration/configuration/testdata/configuration.proto_prev)
